/****************************************************************************
*
*    Copyright (c) 2005 - 2014 by Vivante Corp.  All rights reserved.
*
*    The material in this file is confidential and contains trade secrets
*    of Vivante Corporation. This is proprietary information owned by
*    Vivante Corporation. No part of this work may be disclosed,
*    reproduced, copied, transmitted, or used in any way for any purpose,
*    without the express written permission of Vivante Corporation.
*
*****************************************************************************/


#include <stdlib.h>
#include <memory.h>
#include "tiny_ui.h"
#include "tiny_ui_kernel.h"

#define TINY_UI_COMMAND_BUFFER_SIZE 4096

#define TINY_UI_RETURN_ERROR(func) \
if ((error = func) != TINY_UI_SUCCESS) \
return error

// *** Command macros ***

#define TINY_UI_END(interrupt)  (0x00000000 | interrupt)
#define TINY_UI_SEMAPHORE(id)   (0x10000000 | id)
#define TINY_UI_STALL(id)       (0x20000000 | id)
#define TINY_UI_STATE(address)  (0x30010000 | address)
#define TINY_UI_DATA(count)     (0x40000000 | count)
#define TINY_UI_CALL(count)     (0x60000000 | count)
#define TINY_UI_RETURN()        (0x70000000)


// Temporary profiling stuff added by Davor.
typedef struct {
  uint32_t      RCMDbufferSize;
  uint32_t      RCMDbufferHighWM;
  uint32_t      VCMDbufferSize;
  uint32_t      VCMDbufferHighWM;
}GPU_State_t;

GPU_State_t GPU_State;

typedef struct tiny_ui_context {
    tiny_ui_kernel_context_t * context;
    tiny_ui_capabilities_t capabilities;
    unsigned char * command_buffer;
    uint32_t command_buffer_size;
    uint32_t command_offset;
    uint32_t tessellation_buffer_gpu[3];
    uint32_t tessellation_buffer_size[3];
    uint32_t tessellation_stride;
    uint32_t tessellation_width_height;
}
tiny_ui_context_t;

tiny_ui_context_t s_blit_context;
tiny_ui_context_t s_draw_context;

typedef struct tiny_ui_point {
    int x;
    int y;
}
tiny_ui_point_t;

static uint32_t convert_path_format(tiny_ui_format_t format)
{
    switch (format) {
        case TINY_UI_S8:
            return 0;

        case TINY_UI_S16:
            return 0x100000;

        case TINY_UI_S32:
            return 0x200000;

        case TINY_UI_FP32:
            return 0x300000;
    }
}

static uint32_t convert_path_quality(tiny_ui_quality_t quality)
{
    switch (quality) {
        case TINY_UI_HIGH:
            return 0x3;

        case TINY_UI_MEDIUM:
            return 0x1;

        default:
            return 0x0;
    }
}

static uint32_t rgb_to_l(uint32_t color)
{
    uint32_t l = 0.2126f * (color & 0xFF) + 0.7152 * ((color >> 8) & 0xFF) + 0.0722f * ((color >> 16) & 0xFF);
    return l | (l << 24);
}

static uint32_t get_format_bytes(tiny_ui_buffer_format_t format)
{
    switch (format) {
        case TINY_UI_L8:
        case TINY_UI_A8:
            return 1;

        case TINY_UI_RGBA4444:
        case TINY_UI_BGRA4444:
        case TINY_UI_RGB565:
        case TINY_UI_BGR565:
        case TINY_UI_YUYV:
            return 2;

        case TINY_UI_RGBA8888:
        case TINY_UI_BGRA8888:
            return 4;
    }
}

static uint32_t convert_target_format(tiny_ui_buffer_format_t format, tiny_ui_capabilities_t caps)
{
    switch (format) {
        case TINY_UI_A8:
            return 0;

        case TINY_UI_L8:
            return caps.l8 ? 6 : 0;

        case TINY_UI_RGBA4444:
            return 36;

        case TINY_UI_BGRA4444:
            return 4;

        case TINY_UI_RGB565:
            return 33;

        case TINY_UI_BGR565:
            return 1;

        case TINY_UI_RGBA8888:
            return 35;

        case TINY_UI_BGRA8888:
            return 3;

        case TINY_UI_YUYV:
            return 7;
    }
}

static uint32_t convert_source_format(tiny_ui_buffer_format_t format)
{
    switch (format) {
        case TINY_UI_L8:
            return 0;

        case TINY_UI_A8:
            return 2;

        case TINY_UI_RGBA4444:
            return 35;

        case TINY_UI_BGRA4444:
            return 3;

        case TINY_UI_RGB565:
            return 37;

        case TINY_UI_BGR565:
            return 5;

        case TINY_UI_RGBA8888:
            return 39;

        case TINY_UI_BGRA8888:
            return 7;

        case TINY_UI_YUYV:
            return 8;
    }
}

static uint32_t convert_blend(tiny_ui_blend_t blend)
{
    switch (blend) {
        case TINY_UI_BLEND_SRC_OVER:
            return 0x00000100;

        case TINY_UI_BLEND_DST_OVER:
            return 0x00000200;

        case TINY_UI_BLEND_SRC_IN:
            return 0x00000300;

        case TINY_UI_BLEND_DST_IN:
            return 0x00000400;

        case TINY_UI_BLEND_SCREEN:
            return 0x00000600;

        case TINY_UI_BLEND_MULTIPLY:
            return 0x00000500;

        case TINY_UI_BLEND_ADDITIVE:
            return 0x00000900;

        case TINY_UI_BLEND_SUBTRACT:
            return 0x00000A00;

        default:
            return 0;
    }
}

static tiny_ui_error_t push_state(tiny_ui_context_t * context, uint32_t address, uint32_t data)
{
    if (context->command_buffer == NULL)
        return TINY_UI_NO_CONTEXT;

    if (context->command_offset + 8 >= context->command_buffer_size) {
        context->command_offset = 0;
        return TINY_UI_OUT_OF_RESOURCES;
    }

    ((uint32_t *) (context->command_buffer + context->command_offset))[0] = TINY_UI_STATE(address);
    ((uint32_t *) (context->command_buffer + context->command_offset))[1] = data;
    context->command_offset += 8;

    return TINY_UI_SUCCESS;
}

static tiny_ui_error_t push_call(tiny_ui_context_t * context, uint32_t address, uint32_t bytes)
{
    if (context->command_buffer == NULL)
        return TINY_UI_NO_CONTEXT;

    if (context->command_offset + 8 >= context->command_buffer_size) {
        context->command_offset = 0;
        return TINY_UI_OUT_OF_RESOURCES;
    }

    ((uint32_t *) (context->command_buffer + context->command_offset))[0] = TINY_UI_CALL((bytes + 7) / 8);
    ((uint32_t *) (context->command_buffer + context->command_offset))[1] = address;
    context->command_offset += 8;

    return TINY_UI_SUCCESS;
}

static tiny_ui_error_t push_rectangle(tiny_ui_context_t * context, int x, int y, int width, int height)
{
    if (context->command_buffer == NULL)
        return TINY_UI_NO_CONTEXT;

    if (context->command_offset + 16 >= context->command_buffer_size) {
        context->command_offset = 0;
        return TINY_UI_OUT_OF_RESOURCES;
    }

    ((uint32_t *) (context->command_buffer + context->command_offset))[0] = TINY_UI_DATA(1);
    ((uint16_t *) (context->command_buffer + context->command_offset))[4] = x;
    ((uint16_t *) (context->command_buffer + context->command_offset))[5] = y;
    ((uint16_t *) (context->command_buffer + context->command_offset))[6] = width;
    ((uint16_t *) (context->command_buffer + context->command_offset))[7] = height;
    context->command_offset += 16;

    return 0;
}

static tiny_ui_error_t push_data(tiny_ui_context_t * context, int size, void * data)
{
    int bytes = TINY_UI_ALIGN(size, 8);

    if (context->command_buffer == NULL)
        return TINY_UI_NO_CONTEXT;

    if (context->command_offset + 8 + bytes >= context->command_buffer_size) {
        context->command_offset = 0;
        return TINY_UI_OUT_OF_RESOURCES;
    }

    ((uint32_t *) (context->command_buffer + context->command_offset))[0] = TINY_UI_DATA(bytes / 8);
    memcpy(context->command_buffer + context->command_offset + 8, data, size);
    context->command_offset += 8 + bytes;

    return 0;
}

static tiny_ui_error_t push_stall(tiny_ui_context_t * context, uint32_t module)
{
    if (context->command_buffer == NULL)
        return TINY_UI_NO_CONTEXT;

    if (context->command_offset + 16 >= context->command_buffer_size) {
        context->command_offset = 0;
        return TINY_UI_OUT_OF_RESOURCES;
    }

    ((uint32_t *) (context->command_buffer + context->command_offset))[0] = TINY_UI_SEMAPHORE(module);
    ((uint32_t *) (context->command_buffer + context->command_offset))[2] = TINY_UI_STALL(module);
    context->command_offset += 16;

    return 0;
}

static tiny_ui_error_t commit(tiny_ui_context_t * context)
{
    tiny_ui_error_t error;
    tiny_ui_kernel_submit_t submit;
    tiny_ui_kernel_wait_t wait;

    // Check if there is a valid context and an allocated command buffer.
    if ((context->context == NULL) || (context->command_buffer == NULL))
        return TINY_UI_NO_CONTEXT;

    // Check if there is anything to submit.
    if (context->command_offset == 0)
        return TINY_UI_SUCCESS;

    // Check if there is enough space in the command buffer for the END.
    if (context->command_offset + 8 > context->command_buffer_size) {
        // Reset command buffer offset.
        context->command_offset = 0;
        return TINY_UI_OUT_OF_RESOURCES;
    }

    // Append END command into the command buffer.
    ((uint32_t *) (context->command_buffer + context->command_offset))[0] = TINY_UI_END(0);
    context->command_offset += 8;

    // Submit the command buffer.
    submit.context = context->context;
    submit.commands = context->command_buffer;
    submit.command_size = context->command_offset;
    TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_SUBMIT, &submit));

    // Reset command buffer.
    context->command_offset = 0;

    // Wait until GPU is ready.
    wait.context = context->context;
    wait.timeout_ms = TINY_UI_INFINITE;
    TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_WAIT, &wait));

    // Success.
    return TINY_UI_SUCCESS;
}

static int inverse(tiny_ui_matrix_t * result, tiny_ui_matrix_t * matrix)
{
    tiny_ui_float_t det00, det01, det02;
    tiny_ui_float_t d;
    int isAffine;

    // Test for identity matrix.
    if (matrix == NULL) {
        result->m[0][0] = 1.0f;
        result->m[0][1] = 0.0f;
        result->m[0][2] = 0.0f;
        result->m[1][0] = 0.0f;
        result->m[1][1] = 1.0f;
        result->m[1][2] = 0.0f;
        result->m[2][0] = 0.0f;
        result->m[2][1] = 0.0f;
        result->m[2][2] = 1.0f;

        // Success.
        return 1;
    }

    det00 = matrix->m[1][1] * matrix->m[2][2] - matrix->m[2][1] * matrix->m[1][2];
    det01 = matrix->m[2][0] * matrix->m[1][2] - matrix->m[1][0] * matrix->m[2][2];
    det02 = matrix->m[1][0] * matrix->m[2][1] - matrix->m[2][0] * matrix->m[1][1];

    // Compute determinant.
    d = matrix->m[0][0] * det00 + matrix->m[0][1] * det01 + matrix->m[0][2] * det02;

    // Return 0 if there is no inverse matrix.
    if (d == 0.0f)
        return 0;

    // Compute reciprocal.
    d = 1.0f / d;

    // Determine if the matrix is affine.
    isAffine = (matrix->m[2][0] == 0.0f) && (matrix->m[2][1] == 0.0f) && (matrix->m[2][2] == 1.0f);

    result->m[0][0] = d * det00;
    result->m[0][1] = d * (matrix->m[2][1] * matrix->m[0][2] - matrix->m[0][1] * matrix->m[2][2]);
    result->m[0][2] = d * (matrix->m[0][1] * matrix->m[1][2] - matrix->m[1][1] * matrix->m[0][2]);
    result->m[1][0] = d * det01;
    result->m[1][1] = d * (matrix->m[0][0] * matrix->m[2][2] - matrix->m[2][0] * matrix->m[0][2]);
    result->m[1][2] = d * (matrix->m[1][0] * matrix->m[0][2] - matrix->m[0][0] * matrix->m[1][2]);
    result->m[2][0] = isAffine ? 0.0f : d * det02;
    result->m[2][1] = isAffine ? 0.0f : d * (matrix->m[2][0] * matrix->m[0][1] - matrix->m[0][0] * matrix->m[2][1]);
    result->m[2][2] = isAffine ? 1.0f : d * (matrix->m[0][0] * matrix->m[1][1] - matrix->m[1][0] * matrix->m[0][1]);

    // Success.
    return 1;
}

static int transform(tiny_ui_point_t * result, tiny_ui_float_t x, tiny_ui_float_t y, tiny_ui_matrix_t * matrix)
{
    tiny_ui_float_t pt_x;
    tiny_ui_float_t pt_y;
    tiny_ui_float_t pt_w;

    // Test for identity matrix.
    if (matrix == NULL) {
        result->x = x;
        result->y = y;

        // Success.
        return 1;
    }

    // Transform x, y, and w.
    pt_x = x * matrix->m[0][0] + y * matrix->m[0][1] + matrix->m[0][2];
    pt_y = x * matrix->m[1][0] + y * matrix->m[1][1] + matrix->m[1][2];
    pt_w = x * matrix->m[2][0] + y * matrix->m[2][1] + matrix->m[2][2];

    if (pt_w <= 0.0f)
        return 0;

    // Compute projected x and y.
    result->x = pt_x / pt_w + 0.5f;
    result->y = pt_y / pt_w + 0.5f;

    // Success.
    return 1;
}

tiny_ui_error_t tiny_ui_blit_init(void)
{
    tiny_ui_error_t error;
    tiny_ui_kernel_initialize_t initialize;

    // Allocate a command buffer.
    initialize.command_buffer_size = TINY_UI_COMMAND_BUFFER_SIZE;
    initialize.tessellation_width = 0;
    initialize.tessellation_height = 0;
    TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_INITIALIZE, &initialize));

    // Save blitter context.
    s_blit_context.context = initialize.context;
    s_blit_context.capabilities = initialize.capabilities;
    s_blit_context.command_buffer = initialize.command_buffer;
    s_blit_context.command_buffer_size = initialize.command_buffer_size;
    s_blit_context.command_offset = 0;

    // Success.
    return TINY_UI_SUCCESS;
}

tiny_ui_error_t tiny_ui_clear(tiny_ui_buffer_t * buffer,
                              tiny_ui_rectangle_t * rectangle,
                              tiny_ui_color_t color)
{
    tiny_ui_error_t error;
    int x, y, width, height;
    uint32_t color32;

    // Get rectangle.
    x = (rectangle != NULL) ? rectangle->x : 0;
    y = (rectangle != NULL) ? rectangle->y : 0;
    width  = (rectangle != NULL) ? rectangle->width : buffer->width;
    height = (rectangle != NULL) ? rectangle->height : buffer->height;

    // Get converted color when target is in L8 format.
    color32 = (buffer->format == TINY_UI_L8) ? rgb_to_l(color) : color;

    // Setup the command buffer.
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A00, 0x00000001));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A02, color32));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A10, convert_target_format(buffer->format,
                                                                                   s_blit_context.capabilities) | 0x00010000));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A11, buffer->address));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A12, buffer->stride));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A13, buffer->width | (buffer->height << 16)));
    TINY_UI_RETURN_ERROR(push_rectangle(&s_blit_context, x, y, width, height));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A1B, 0x00000001));
    TINY_UI_RETURN_ERROR(push_stall(&s_blit_context, 7));

    // Commit.
    TINY_UI_RETURN_ERROR(commit(&s_blit_context));

    // Success.
    return TINY_UI_SUCCESS;
}

tiny_ui_error_t tiny_ui_blit(tiny_ui_buffer_t * target,
                             tiny_ui_buffer_t * source,
                             tiny_ui_matrix_t * matrix,
                             tiny_ui_blend_t blend,
                             tiny_ui_color_t color)
{
    tiny_ui_error_t error;
    tiny_ui_point_t point_min, point_max, temp;
    tiny_ui_matrix_t inverse_matrix;
    tiny_ui_float_t x_step[3];
    tiny_ui_float_t y_step[3];
    tiny_ui_float_t c_step[3];
    uint32_t imageMode;
    uint32_t blend_mode;
    tiny_ui_blend_t forced_blending = blend;
    int32_t rotation = 0;
    uint32_t conversion = 0;

    // Check if the specified matrix has rotation or perspective.
    if (   (matrix != NULL)
        && (   matrix->m[0][1] != 0.0f
            || matrix->m[1][0] != 0.0f
            || matrix->m[2][0] != 0.0f
            || matrix->m[2][1] != 0.0f
            || matrix->m[2][2] != 1.0f
            )
        ) {
        // Mark that we have rotation.
        rotation = 0x8000;

        if (!s_blit_context.capabilities.border_culling) {
            // Force blending to SRC_OVER to remove pixels outside the image.
            forced_blending = TINY_UI_BLEND_SRC_OVER;
        }
    }

    // If target is L8 and source is in YUV or RGB (not L8 or A8) then we have to convert RGB into L8.
    if ((target->format == TINY_UI_L8) && ((source->format != TINY_UI_L8) && (source->format != TINY_UI_A8))) {
        if (s_blit_context.capabilities.l8) {
            conversion = 0x80000000;
        } else {
            // TODO: Must convert source image to L8 in software.
        }
    }

    // Transform image (0,0) to screen.
    if (!transform(&temp, 0, 0, matrix))
        return TINY_UI_INVALID_ARGUMENT;

    // Set initial point.
    point_min = temp;
    point_max = temp;

    // Transform image (0,height) to screen.
    if (!transform(&temp, 0, source->height, matrix))
        return TINY_UI_INVALID_ARGUMENT;

    // Determine min/max.
    if (temp.x < point_min.x) point_min.x = temp.x;
    if (temp.y < point_min.y) point_min.y = temp.y;
    if (temp.x > point_max.x) point_max.x = temp.x;
    if (temp.y > point_max.y) point_max.y = temp.y;

    // Transform image (width,height) to screen.
    if (!transform(&temp, source->width, source->height, matrix))
        return TINY_UI_INVALID_ARGUMENT;

    // Determine min/max.
    if (temp.x < point_min.x) point_min.x = temp.x;
    if (temp.y < point_min.y) point_min.y = temp.y;
    if (temp.x > point_max.x) point_max.x = temp.x;
    if (temp.y > point_max.y) point_max.y = temp.y;

    // Transform image (width,0) to screen.
    if (!transform(&temp, source->width, 0, matrix))
        return TINY_UI_INVALID_ARGUMENT;

    // Determine min/max.
    if (temp.x < point_min.x) point_min.x = temp.x;
    if (temp.y < point_min.y) point_min.y = temp.y;
    if (temp.x > point_max.x) point_max.x = temp.x;
    if (temp.y > point_max.y) point_max.y = temp.y;

    // Clip to target.
    if (point_min.x < 0) point_min.x = 0;
    if (point_min.y < 0) point_min.y = 0;
    if (point_max.x > target->width) point_max.x = target->width;
    if (point_max.y > target->height) point_max.y = target->height;

    // Compute inverse matrix.
    if (!inverse(&inverse_matrix, matrix))
        return TINY_UI_INVALID_ARGUMENT;

    // Compute interpolation steps.
    x_step[0] = inverse_matrix.m[0][0] / source->width;
    x_step[1] = inverse_matrix.m[1][0] / source->height;
    x_step[2] = inverse_matrix.m[2][0];
    y_step[0] = inverse_matrix.m[0][1] / source->width;
    y_step[1] = inverse_matrix.m[1][1] / source->height;
    y_step[2] = inverse_matrix.m[2][1];
    c_step[0] = inverse_matrix.m[0][2] / source->width;
    c_step[1] = inverse_matrix.m[1][2] / source->height;
    c_step[2] = inverse_matrix.m[2][2];

    // Determine image mode (NORMAL or MULTIPLY) depending on the color.
    imageMode = (color == 0) ? 0x00001000 : 0x00002000;
    blend_mode = convert_blend(forced_blending);

    // Setup the command buffer.
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A00, 0x00000001 | imageMode | blend_mode | rotation));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A02, color));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A10, convert_target_format(target->format,
                                                                                   s_blit_context.capabilities) | 0x00010000));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A11, target->address));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A12, target->stride));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A13, target->width | (target->height << 16)));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A18, *(uint32_t *) &c_step[0]));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A19, *(uint32_t *) &c_step[1]));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A1A, *(uint32_t *) &c_step[2]));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A1C, *(uint32_t *) &x_step[0]));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A1D, *(uint32_t *) &x_step[1]));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A1E, *(uint32_t *) &x_step[2]));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A1F, 0x00000001));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A20, *(uint32_t *) &y_step[0]));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A21, *(uint32_t *) &y_step[1]));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A22, *(uint32_t *) &y_step[2]));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A25, convert_source_format(source->format) | 0x00020000 | conversion));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A27, 0));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A29, source->address));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A2B, source->stride));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A2D, 0));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A2F, source->width | (source->height << 16)));
    TINY_UI_RETURN_ERROR(push_rectangle(&s_blit_context, point_min.x, point_min.y, point_max.x - point_min.x,
                                        point_max.y - point_min.y));
    TINY_UI_RETURN_ERROR(push_state(&s_blit_context, 0x0A1B, 0x00000001));
    TINY_UI_RETURN_ERROR(push_stall(&s_blit_context, 7));

    // Commit.
    TINY_UI_RETURN_ERROR(commit(&s_blit_context));

    // Return error.
    return error;
}

tiny_ui_error_t tiny_ui_blit_close(void)
{
    tiny_ui_error_t error;
    tiny_ui_kernel_terminate_t terminate;

    // Terminate the blitter context.
    terminate.context = s_blit_context.context;
    TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_TERMINATE, &terminate));

    // Reset the blitter context.
    memset(&s_blit_context, 0, sizeof(s_blit_context));

    // Success.
    return TINY_UI_SUCCESS;
}

tiny_ui_error_t tiny_ui_draw_init(int32_t tessellation_width,
                                  int32_t tessellation_height)
{
    tiny_ui_error_t error;
    tiny_ui_kernel_initialize_t initialize;

    // Allocate a command buffer and a tessellation buffer.
    initialize.command_buffer_size = TINY_UI_COMMAND_BUFFER_SIZE;
    initialize.tessellation_width = tessellation_width;
    initialize.tessellation_height = tessellation_height;
    TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_INITIALIZE, &initialize));

    // Save draw context.
    s_draw_context.context = initialize.context;
    s_draw_context.capabilities = initialize.capabilities;
    s_draw_context.command_buffer = initialize.command_buffer;
    s_draw_context.command_buffer_size = initialize.command_buffer_size;
    s_draw_context.command_offset = 0;
    s_draw_context.tessellation_buffer_gpu[0] = initialize.tessellation_buffer_gpu[0];
    s_draw_context.tessellation_buffer_gpu[1] = initialize.tessellation_buffer_gpu[1];
    s_draw_context.tessellation_buffer_gpu[2] = initialize.tessellation_buffer_gpu[2];
    s_draw_context.tessellation_stride = initialize.tessellation_stride;
    s_draw_context.tessellation_width_height = initialize.tessellation_width_height;
    s_draw_context.tessellation_buffer_size[0] = initialize.tessellation_buffer_size[0];
    s_draw_context.tessellation_buffer_size[1] = initialize.tessellation_buffer_size[1];
    s_draw_context.tessellation_buffer_size[2] = initialize.tessellation_buffer_size[2];

    // Success.
    return TINY_UI_SUCCESS;
}

tiny_ui_error_t tiny_ui_draw(tiny_ui_buffer_t * target,
                             tiny_ui_path_t * path,
                             tiny_ui_fill_t fill_rule,
                             tiny_ui_matrix_t * matrix,
                             tiny_ui_blend_t blend,
                             tiny_ui_color_t color)
{
    uint32_t color32;
    tiny_ui_point_t point_min, point_max, temp;
    uint32_t blend_mode;
    uint32_t format, quality, tiling, fill;
    uint32_t tessellation_size;
    tiny_ui_error_t error;
    int first;
    int x, y, width, height;
    tiny_ui_kernel_allocate_t memory;
    tiny_ui_kernel_free_t free_memory;
    uint32_t return_offset;

    color32 = (target->format == TINY_UI_L8) ? rgb_to_l(color) : color;

    transform(&temp, path->bounding_box[0], path->bounding_box[1], matrix);
    point_min = point_max = temp;

    transform(&temp, path->bounding_box[2], path->bounding_box[1], matrix);
    if (temp.x < point_min.x) point_min.x = temp.x;
    if (temp.y < point_min.y) point_min.y = temp.y;
    if (temp.x > point_max.x) point_max.x = temp.x;
    if (temp.y > point_max.y) point_max.y = temp.y;

    transform(&temp, path->bounding_box[2], path->bounding_box[3], matrix);
    if (temp.x < point_min.x) point_min.x = temp.x;
    if (temp.y < point_min.y) point_min.y = temp.y;
    if (temp.x > point_max.x) point_max.x = temp.x;
    if (temp.y > point_max.y) point_max.y = temp.y;

    transform(&temp, path->bounding_box[0], path->bounding_box[3], matrix);
    if (temp.x < point_min.x) point_min.x = temp.x;
    if (temp.y < point_min.y) point_min.y = temp.y;
    if (temp.x > point_max.x) point_max.x = temp.x;
    if (temp.y > point_max.y) point_max.y = temp.y;

    if (point_min.x < 0) point_min.x = 0;
    if (point_min.y < 0) point_min.y = 0;
    if (point_max.x > target->width) point_max.x = target->width;
    if (point_max.y > target->height) point_max.y = target->height;

    // Convert states into hardware.
    blend_mode = convert_blend(blend);
    format = convert_path_format(path->format);
    quality = convert_path_quality(path->quality);
    tiling = (s_draw_context.capabilities.tiled == 2) ? 0x2000000 : 0;
    fill = (fill_rule == TINY_UI_FILL_EVEN_ODD) ? 0x4 : 0;
    tessellation_size = (  s_draw_context.tessellation_buffer_size[2]
                         ? s_draw_context.tessellation_buffer_size[2]
                         : s_draw_context.tessellation_buffer_size[1]
                         );

    // Setup the command buffer.
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A00, s_draw_context.capabilities.tiled | blend_mode));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A02, color));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A10, convert_target_format(target->format,
                                                                                   s_draw_context.capabilities) | 0x00010000));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A11, target->address));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A12, target->stride));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A13, target->width | (target->height << 16)));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A30, s_draw_context.tessellation_buffer_gpu[0]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A31, s_draw_context.tessellation_buffer_gpu[1]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A32, s_draw_context.tessellation_buffer_gpu[2]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A33, s_draw_context.tessellation_stride));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A34, 0x01000010 | format | quality | tiling | fill));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A35, s_draw_context.tessellation_buffer_gpu[0]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A36, s_draw_context.tessellation_buffer_gpu[1]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A37, s_draw_context.tessellation_buffer_gpu[2]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A38, s_draw_context.tessellation_stride));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A3A, s_draw_context.tessellation_width_height));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A3B, 0x3F800000));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A3C, 0x00000000));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A40, *(uint32_t *) &matrix->m[0][0]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A41, *(uint32_t *) &matrix->m[0][1]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A42, *(uint32_t *) &matrix->m[0][2]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A43, *(uint32_t *) &matrix->m[1][0]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A44, *(uint32_t *) &matrix->m[1][1]));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A45, *(uint32_t *) &matrix->m[1][2]));

    if (path->uploaded == NULL) {
        // Allocate memory for the path data.
        memory.bytes = 16 + TINY_UI_ALIGN(path->path_length, 8);
        return_offset = (8 + TINY_UI_ALIGN(path->path_length, 8)) / 4;
        memory.contiguous = 1;
        TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_ALLOCATE, &memory));
        ((uint32_t *) memory.memory)[0] = TINY_UI_DATA((path->path_length + 7) / 8);
        memcpy((uint8_t *) memory.memory + 8, path->path, path->path_length);
        ((uint32_t *) memory.memory)[return_offset] = TINY_UI_RETURN();
    }

    // Setup tessellation loop.
    first = 1;
    width = s_draw_context.tessellation_width_height & 0xFFFF;
    height = s_draw_context.tessellation_width_height >> 16;
    for (y = point_min.y; y < point_max.y; y += height) {
        for (x = point_min.x; x < point_max.x; x += width) {
            // Tessellate path.
            if (first) {
                first = 0;
            }
            else {
                TINY_UI_RETURN_ERROR(push_stall(&s_draw_context, 15));
            }
            TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A1B, 0x00011000));
            TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A01, x | (y << 16)));
            TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A39, x | (y << 16)));
            TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A3D, tessellation_size / 64));
            if (path->uploaded == NULL) {
                TINY_UI_RETURN_ERROR(push_call(&s_draw_context, memory.memory_gpu, memory.bytes));
            }
            else {
                TINY_UI_RETURN_ERROR(push_call(&s_draw_context, path->uploaded->address, path->path_length));
            }
        }
    }

    // Finialize command buffer.
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A34, 0));
    TINY_UI_RETURN_ERROR(push_state(&s_draw_context, 0x0A1B, 0x00000001));
    TINY_UI_RETURN_ERROR(push_stall(&s_draw_context, 7));

    // Commit.
    TINY_UI_RETURN_ERROR(commit(&s_draw_context));

    if (path->uploaded == NULL) {
        // Free the path memory.
        free_memory.memory_handle = memory.memory_handle;
        TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_FREE, &free_memory));
    }

    // Success.
    return TINY_UI_SUCCESS;
}

tiny_ui_error_t tiny_ui_draw_close(void)
{
    tiny_ui_error_t error;
    tiny_ui_kernel_terminate_t terminate;

    // Termnate the draw context.
    terminate.context = s_draw_context.context;
    TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_TERMINATE, &terminate));

    // Reset the draw context.
    memset(&s_draw_context, 0, sizeof(s_draw_context));

    // Success.
    return TINY_UI_SUCCESS;
}

tiny_ui_error_t tiny_ui_allocate(tiny_ui_buffer_t * buffer)
{
    tiny_ui_error_t error;
    tiny_ui_kernel_allocate_t allocate;

    // Check if we need to compute the stride.
    if (buffer->stride == 0) {
        // Compute the stride to be 32-bit aligned.
        buffer->stride = (buffer->width * get_format_bytes(buffer->format) + 3) & ~3;
    }

    // Allocate the buffer.
    allocate.bytes = buffer->stride * buffer->height;
    allocate.contiguous = 1;
    TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_ALLOCATE, &allocate));

    // Save the buffer allocation.
    buffer->handle  = allocate.memory_handle;
    buffer->memory  = allocate.memory;
    buffer->address = allocate.memory_gpu;

    // Success.
    return TINY_UI_SUCCESS;
}

tiny_ui_error_t tiny_ui_free(tiny_ui_buffer_t * buffer)
{
    tiny_ui_error_t error;
    tiny_ui_kernel_free_t free;

    // Make sure we have a valid memory handle.
    if (buffer->handle == NULL) {
        return TINY_UI_INVALID_ARGUMENT;
    }

    // Free the buffer.
    free.memory_handle = buffer->handle;
    TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_FREE, &free));

    // Mark the buffer as freed.
    buffer->handle = NULL;
    buffer->memory = NULL;

    // Success.
    return TINY_UI_SUCCESS;
}

tiny_ui_error_t tiny_ui_map(tiny_ui_buffer_t * buffer)
{
    tiny_ui_error_t error;
    tiny_ui_kernel_map_t map;

    // We either need a logical or physical address.
    if (buffer->memory == NULL && buffer->address == 0) {
        return TINY_UI_INVALID_ARGUMENT;
    }

    // Check if we need to compute the stride.
    if (buffer->stride == 0) {
        // Compute the stride to be 32-bit aligned.
        buffer->stride = (buffer->width * get_format_bytes(buffer->format) + 3) & ~3;
    }

    // Map the buffer.
    map.bytes = buffer->stride * buffer->height;
    map.logical = buffer->memory;
    map.physical = buffer->address;
    TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_MAP, &map));

    // Save the buffer allocation.
    buffer->handle  = map.memory_handle;
    buffer->address = map.memory_gpu;

    // Success.
    return TINY_UI_SUCCESS;
}

tiny_ui_error_t tiny_ui_unmap(tiny_ui_buffer_t * buffer)
{
    tiny_ui_error_t error;
    tiny_ui_kernel_unmap_t unmap;

    // Make sure we have a valid memory handle.
    if (buffer->handle == NULL) {
        return TINY_UI_INVALID_ARGUMENT;
    }

    // Unmap the buffer.
    unmap.memory_handle = buffer->handle;
    TINY_UI_RETURN_ERROR(tiny_ui_kernel(TINY_UI_UNMAP, &unmap));

    // Mark the buffer as freed.
    buffer->handle = NULL;

    // Success.
    return TINY_UI_SUCCESS;
}
